#ifndef __SALMON_UTILS_HPP__
#define __SALMON_UTILS_HPP__

extern "C" {
#include "io_lib/os.h"
#include "io_lib/scram.h"
#undef min
#undef max
}

#include <algorithm>
#include <boost/filesystem.hpp>
#include <boost/program_options.hpp>
#include <iostream>
#include <memory>
#include <random>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include <Eigen/Dense>

#include "oneapi/tbb/task_arena.h"

#include "cereal/archives/json.hpp"
#include "spdlog/fmt/fmt.h"

#include "SalmonMath.hpp"
#include "SalmonOpts.hpp"

#include "GenomicFeature.hpp"
#include "LibraryFormat.hpp"
#include "pufferfish/Util.hpp"
#include "ReadLibrary.hpp"
#include "SalmonConfig.hpp"
#include "TranscriptGeneMap.hpp"

template <typename EqBuilderT> class ReadExperiment;
class LibraryFormat;
class FragmentLengthDistribution;
class Transcript;

namespace salmon {
namespace utils {

using std::string;
using NameVector = std::vector<string>;
using IndexVector = std::vector<size_t>;
using KmerVector = std::vector<uint64_t>;
using MateStatus = pufferfish::util::MateStatus;

// Keep track of the type of mapping that was obtained for this read
enum class MappingType : uint8_t {
  UNMAPPED = 0,
  LEFT_ORPHAN = 1,
  RIGHT_ORPHAN = 2,
  BOTH_ORPHAN = 3,
  PAIRED_MAPPED = 4,
  SINGLE_MAPPED = 5,
  DECOY = 6
};

enum class DuplicateTargetStatus : uint8_t { 
  UNKNOWN = 0, 
  RETAINED_DUPLICATES = 1, 
  REMOVED_DUPLICATES = 2 
};

std::string str(const MappingType& mt);

// To keep track of short fragments (shorter than the k-mer length)
// on which the index was built.
struct ShortFragStats {
  size_t numTooShort{0};
  size_t shortest{std::numeric_limits<size_t>::max()};
};

// An enum class for direction to avoid potential errors
// with keeping everything as a bool
enum class Direction { FORWARD = 0, REVERSE_COMPLEMENT = 1, REVERSE = 2 };

// Returns FORWARD if isFwd is true and REVERSE_COMPLEMENT otherwise
constexpr inline Direction boolToDirection(bool isFwd) {
  return isFwd ? Direction::FORWARD : Direction::REVERSE_COMPLEMENT;
}

// Returns a uint64_t where the upper 32-bits
// contain tid and the lower 32-bits contain offset
uint64_t encode(uint64_t tid, uint64_t offset);

// Given a uin64_t generated by encode(), return the
// transcript id --- upper 32-bits
uint32_t transcript(uint64_t enc);

// Given a uin64_t generated by encode(), return the
// offset --- lower 32-bits
uint32_t offset(uint64_t enc);

LibraryFormat parseLibraryFormatStringNew(std::string& fmt);

std::vector<ReadLibrary>
extractReadLibraries(boost::program_options::parsed_options& orderedOptions);

LibraryFormat parseLibraryFormatString(std::string& fmt);

bool peekBAMIsPaired(const boost::filesystem::path& fname);

size_t numberOfReadsInFastaFile(const std::string& fname);

bool readKmerOrder(const std::string& fname, std::vector<uint64_t>& kmers);

template <template <typename> class S, typename T>
bool overlap(const S<T>& a, const S<T>& b);

template <typename T>
TranscriptGeneMap
transcriptToGeneMapFromFeatures(std::vector<GenomicFeature<T>>& feats);

TranscriptGeneMap transcriptGeneMapFromGTF(const std::string& fname,
                                           std::string key = "gene_id");

TranscriptGeneMap readTranscriptToGeneMap(std::ifstream& ifile);

TranscriptGeneMap
transcriptToGeneMapFromFasta(const std::string& transcriptsFile);

bool readEquivCounts(boost::filesystem::path& eqFilePathString,
                     std::vector<string>& tnames,
                     std::vector<double>& tlens,
                     std::vector<std::vector<uint32_t>>& eqclasses,
                     std::vector<std::vector<double>>& auxs_vals,
                     std::vector<uint32_t>& eqclass_counts );

/**
 * @param sopt : The salmon options object that tells us the relevant files and contains the pointer 
 *              to the logger object
 * @param transcripts : The list of transcript objects
 * 
 * If the auxTargetFile is not empty (i.e. if the file exists)
 **/
void markAuxiliaryTargets(SalmonOpts& sopt, std::vector<Transcript>& transcripts);

/*
template <typename AbundanceVecT, typename ReadExpT>
Eigen::VectorXd updateEffectiveLengths(
        SalmonOpts& sopt,
        ReadExpT& readExp,
        Eigen::VectorXd& effLensIn,
        AbundanceVecT& alphas,
    bool finalRound = false);
*/

template <typename AbundanceVecT, typename ReadExpT>
Eigen::VectorXd
updateEffectiveLengths(oneapi::tbb::task_arena& arena, SalmonOpts& sopt, ReadExpT& readExp,
                       Eigen::VectorXd& effLensIn, AbundanceVecT& alphas,
                       std::vector<bool>& available, bool finalRound = false);

/*
 * Use atomic compare-and-swap to update val to
 * val + inc (*in log-space*).  Update occurs in a loop in case other
 * threads update in the meantime.
 */
inline void incLoopLog(std::atomic<double>& val, double inc) {
  double oldMass = val.load();
  double newMass;
  do {
    newMass = salmon::math::logAdd(oldMass, inc);
  } while (! val.compare_exchange_strong(oldMass, newMass));
}

/*
 * Same as above, but overloaded for "plain" doubles
 */
inline void incLoop(double& val, double inc) { val += inc; }

/*
 * Use atomic compare-and-swap to update val to
 * val + inc.  Update occurs in a loop in case other
 * threads update in the meantime.
 */

inline void incLoop(std::atomic<double>& val, double inc) {
  double oldMass = val.load();
  double newMass;
  do {
    newMass = oldMass + inc;
  } while (!val.compare_exchange_strong(oldMass, newMass));
}

std::string getCurrentTimeAsString();

// encodes the heuristic for guessing how threads should
// be allocated based on the available reads
// returns true if input was modified and false otherwise.
bool configure_parsing(size_t nfiles,            // input param
                       size_t& worker_threads,   // input/output param
                       uint32_t& parse_threads    // input/output param
);

bool validateOptionsAlignment_(SalmonOpts& sopt);
bool validateOptionsMapping_(SalmonOpts& sopt);

bool createAuxMapLoggers_(SalmonOpts& sopt,
                          boost::program_options::variables_map& vm);

bool processQuantOptions(SalmonOpts& sopt,
                         boost::program_options::variables_map& vm,
                         int32_t numBiasSamples);

template <typename OrderedOptionsT>
bool writeCmdInfo(SalmonOpts& sopt, OrderedOptionsT& orderedOptions) {
  namespace bfs = boost::filesystem;
  bfs::path cmdInfoPath = sopt.outputDirectory / "cmd_info.json";
  std::ofstream os(cmdInfoPath.string());
  cereal::JSONOutputArchive oa(os);
  oa(cereal::make_nvp("salmon_version", std::string(salmon::version)));
  for (auto& opt : orderedOptions.options) {
    if (opt.value.size() == 1) {
      oa(cereal::make_nvp(opt.string_key, opt.value.front()));
    } else {
      oa(cereal::make_nvp(opt.string_key, opt.value));
    }
  }
  // explicitly ouput the aux directory as well
  oa(cereal::make_nvp("auxDir", sopt.auxDir));
  return true;
}

void aggregateEstimatesToGeneLevel(TranscriptGeneMap& tgm,
                                   boost::filesystem::path& inputPath);

// NOTE: Throws an invalid_argument exception of the quant or
// quant_bias_corrected files do not exist!
void generateGeneLevelEstimates(boost::filesystem::path& geneMapPath,
                                boost::filesystem::path& estDir);

enum class OrphanStatus : uint8_t {
  LeftOrphan = 0,
  RightOrphan = 1,
  Paired = 2
};

bool headersAreConsistent(SAM_hdr* h1, SAM_hdr* h2);

bool headersAreConsistent(std::vector<SAM_hdr*>&& headers);

inline void reverseComplement(const char* s, int32_t l, std::string& o) {
  if (static_cast<decltype(o.size())>(l) > o.size()) {
    o.resize(l, 'A');
  }
  int32_t j = 0;
  for (int32_t i = l - 1; i >= 0; --i, ++j) {
    switch (s[i]) {
    case 'A':
    case 'a':
      o[j] = 'T';
      break;
    case 'C':
    case 'c':
      o[j] = 'G';
      break;
    case 'T':
    case 't':
      o[j] = 'A';
      break;
    case 'G':
    case 'g':
      o[j] = 'C';
      break;
    default:
      o[j] = 'N';
      break;
    }
  }
}

inline std::string reverseComplement(const char* s, int32_t l) {
  std::string o(l, 'A');
  reverseComplement(s, l, o);
  return o;
}

template <typename AlnLibT>
void writeAbundances(const SalmonOpts& sopt, AlnLibT& alnLib,
                     boost::filesystem::path& fname,
                     std::string headerComments = "");

template <typename AlnLibT>
void writeAbundancesFromCollapsed(const SalmonOpts& sopt, AlnLibT& alnLib,
                                  boost::filesystem::path& fname,
                                  std::string headerComments = "");

template <typename AlnLibT>
void normalizeAlphas(const SalmonOpts& sopt, AlnLibT& alnLib);

bool isCompatible(const LibraryFormat observed, const LibraryFormat expected,
                  int32_t start, bool isForward, MateStatus ms);

double logAlignFormatProb(const LibraryFormat observed,
                          const LibraryFormat expected, int32_t start,
                          bool isForward, MateStatus ms,
                          double incompatPrior);

bool compatibleHit(const LibraryFormat expected, int32_t start, bool isForward,
                   MateStatus ms);
bool compatibleHit(const LibraryFormat expected, const LibraryFormat observed);

std::ostream& operator<<(std::ostream& os, OrphanStatus s);
/**
 *  Given the information about the position and strand from which a paired-end
 *  read originated, return the library format with which it is compatible.
 */
LibraryFormat hitType(int32_t end1Start, bool end1Fwd, int32_t end2Start,
                      bool end2Fwd);
LibraryFormat hitType(int32_t end1Start, bool end1Fwd, uint32_t len1,
                      int32_t end2Start, bool end2Fwd, uint32_t len2,
                      bool canDovetail);
/**
 *  Given the information about the position and strand from which the
 *  single-end read originated, return the library format with which it
 *  is compatible.
 */
LibraryFormat hitType(int32_t readStart, bool isForward);

double compute_1_edit_threshold(int32_t l, const SalmonOpts& sopt);

//std::random_device get_random_device();

/**
 *  Cache the mappings provided in an efficient binary format
 *  to the provided file handle.
 *  NOTE: This function assumes the file handle is unique to
 *  the calling thread, no attempt is made to synchronize
 *  output between multiple threads.
 */
  //template <typename AlnT>
  //using AlnGroupVecRange = core::range<typename AlnGroupVec<AlnT>::iterator>;
  //void cacheMappings(AlnGroupVecRange<QuasiAlignment> hits, std::ofstream& ofile);


} // namespace utils
} // namespace salmon

#endif // __SALMON_UTILS_HPP__
